/*
HOSPITAL CORPORATIZATION

THIS FILE LOADS HOSPITAL CLAIMS AND KEEPS BENEFICIARIES WHO HAVE ONE YEAR ENROLLMENT HISTORY. GRAB ALL PATIENTS IN TREATED OR POTENTIAL CONTROL HOSPITALS. DROP REMAINING HOSPITALS. FLAG IF A NON-DEFERRABLE ADMISSION, IF THROUGH THE ED. DROP PATIENTS <AGE 65 AT ADMISSION. CREATES FLAGS FOR READMISSION/MORTALITY AND FOR 1-YEAR DX HISTORY. SAVES FINAL FILE TO BE USED IN REGS.

DEC 24, 2023: PREPARE FOR REPLICATION PACKAGE.
*/

********************************************************************************
**PREAMBLE;
clear all
set more off 
global codepath "<folder containing code files>"
global datapath "<folder containing project data files>"
global claims "<folder containing claims files>"

local firstyr 2009
local lastyr 2017
local prevyr = `firstyr'-1	


local vars bene_id clm_id admsn_dt dschrgdt provider typesrvc fac_type from_dt thru_dt drg_cd stus_cd
local bm_vars bene_id buyin* sex race death_dt bene_dob 
local conds card_surg oth_surg cancer psych oth_unprof

*Run master program file;
qui do "$codepath/0_prog_master_vfinal.do"

*------------------------------------------------------------------------------;
*Step 1: create DRG crosswalks;
*prepare files to merge DRG weights;
import excel using "$datapath/utility/table5FR-2010.xls", firstrow clear
foreach var of varlist _all{
	local new = lower("`var'")
	ren `var' `new'	
}
ren msdrg drg_cd 
destring drg_cd, force replace
ren weights drgwt
drop if drg_cd==.
tempfile drgwt10
save `drgwt10', replace

import excel using "$datapath/utility/table5FR-2015.xlsx", firstrow clear
drop J-N
ren TABLE drg_cd
ren G drgwt
drop in 1/1
keep drg_cd drgwt
destring drg_cd drgwt, force replace
drop if drg_cd==.
tempfile drgwt15
save `drgwt15', replace

*------------------------------------------------------------------------------;
*Step 2: create list of treated and comparison hospitals;

cd "$datapath"

local types is ss 

foreach type of local types{

	use FULLsample_for_atul`type', clear
	
	ren wt_cemm_s wt_cemm_orig 

	merge 1:1 num_prvdr_num year using fulldata_wts`type'
	
	drop if _m==2
	drop _m

	ren num_prvdr_num provider
	sort provider year 

	gen conv_year=.
	by provider: replace conv_year= year if bought_1`type'==1 & bought_1`type'[_n-1]==0

	collapse (mean) conv_year never wt_cemm_orig wt_cemm_s cem_strata (max) bought_1`type', by(provider)

	tostring provider, force replace 
	replace provider = "0" + provider if length(provider)==5
	replace provider = "00" + provider if length(provider)==4

	gen matched = wt_cemm_s>0
	
	ren wt_cemm_s wt_cemm_`type'
	ren cem_strata strata_`type'
	ren never never_`type'

	tab bought matched 
	tab conv_year bought if matched==1

	tempfile base_`type' 
	save `base_`type'', replace 

}

use `base_is', clear 
append using `base_ss'

recode bought_1ss bought_1is never_is never_ss strata_* wt_cemm_* (mis=0) 

tempfile master_hosp_list 
save `master_hosp_list', replace 

*only keep hospitals which are matched treated or comparison units over 2013,2016.
keep if matched==1

tempfile base 
save `base', replace 

********************************************************************************
*Step 3: grab nondef admissions for relevant hospitals in desired period;

forval yr =`firstyr'(1)`lastyr'{

	di "*******************"
	di "Year = `yr'"
	
	cd "$claims/ip/`yr'/"
		
	*identify cases that originated in the ed;
	use if (inrange(rev_cntr,"0450","0459") | (rev_cntr=="0981")) ///
		using ipr`yr', clear  
	
	gen ed = 1 
	
	di "Keep only one row per claim"
	bys bene_id clm_id: keep if _n==1
	
	keep bene_id clm_id ed 
	
	di "number of ED visits"
	count 
	
	di "save revenue file with ed dummy"
	tempfile ip_ed`yr'
	save `ip_ed`yr'', replace
	
	use `vars' icd_dgns_cd* using "$claims/ip/`yr'/ipc`yr'", clear
	ren icd_dgns_cd1 prncpal_dgns_cd
	
	*limit to first 10 dx codes to be consistent over full period;
	cap drop icd_dgns_cd11 icd_dgns_cd12 icd_dgns_cd13 icd_dgns_cd14 icd_dgns_cd15 icd_dgns_cd16 icd_dgns_cd17 icd_dgns_cd18 icd_dgns_cd19 icd_dgns_cd20 icd_dgns_cd21 icd_dgns_cd22 icd_dgns_cd23 icd_dgns_cd24 icd_dgns_cd25
	
	di "keep only treated or potential control hospitals"
	merge m:1 provider using `base', keepusing(conv_year bought_1* never_* wt_cemm_* strata_*)
	keep if _m==3
	drop _m
	
	di "merge ed indicator"
	merge m:1 bene_id clm_id using `ip_ed`yr'', keepusing(ed)
	drop if _m==2
	drop _m
	
	recode ed (missing=0)
		
	di "Replace missing admission/discharge dates"
	replace dschrgdt= thru_dt if dschrgdt==.
	replace admsn_dt=from_dt if admsn_dt==.
	drop from_dt thru_dt

	di "Drop stays longer than a year"
	keep if (dschrgdt - admsn_dt+1)<=365
	
	di "Keep only facility type hospital"
	keep if inlist(fac_type,"1","8")
	
	di "Keep only type of service: inpatient"
	keep if inlist(typesrvc,"1")
	
	drop fac_type typesrvc
	
	gen icd10ind= dschrgdt>=td(1oct2015)
	
	di "create flag if non-deferrable admission"
	ident_nondef
	
	gen ed_nondef = ed * i_nondef
	
	di "only keep patients with so-called non-deferrable admissions;"
	gen dschrgqtr = qofd(dschrgdt)
	format dschrgqtr %tq
	
	di "number of nondef cases by quarter"
	tab dschrgqtr if i_nondef==1
	
	di "only retain non-deferrable admissions"
	keep if i_nondef==1
	
	gen drg_str = drg_cd
		
	destring drg_cd, force replace

	if inrange(`yr',2008,2012){
		di "Merge DRG weight"	
		merge m:1 drg_cd using `drgwt10', keepusing(drgwt)
		drop if _m==2
		drop _m		
	}
	if inrange(`yr',2013,2017){
		di "Merge DRG weight"	
		merge m:1 drg_cd using `drgwt15', keepusing(drgwt)
		drop if _m==2
		drop _m		
	}
	*drg loop ends;
	
	*create flag for elixhauser comorbidities;
	if `yr'<2015{
		qui elixhauser icd_dgns_cd*, index(e) smelix cmorb
		drop weightel*
	}
	if `yr'>2015{
		qui elixhauser icd_dgns_cd*, index(10) smelix cmorb
		drop weightel*
	}
	
	if `yr'==2015{
		preserve 
		
		keep if icd10ind==0
		qui elixhauser icd_dgns_cd*, index(e) smelix cmorb
		keep bene_id clm_id dschrgdt elixsum ynel*
		tempfile icd9el
		save `icd9el', replace
		
		restore
		
		preserve 
		
		keep if icd10ind==1
		qui elixhauser icd_dgns_cd*, index(10) smelix cmorb
		keep bene_id clm_id dschrgdt elixsum ynel*
		tempfile icd10el
		save `icd10el', replace
		
		restore
		
		merge 1:1 bene_id clm_id dschrgdt using `icd9el', keepusing(elixsum ynel*)
		drop if _m==2
		drop _m
		
		merge 1:1 bene_id clm_id dschrgdt using `icd10el', keepusing(elixsum ynel*) update
		drop if _m==2
		drop _m		
	}
	
		
	if `yr'==`firstyr'{
		tempfile ip_sample
		save `ip_sample', replace
	}
	if `yr'>`firstyr'{
		tempfile file`yr'
		save `file`yr'', replace
		use `ip_sample', clear
		append using `file`yr''
		save `ip_sample', replace
	}
}
*year loop ends;

sort bene_id admsn_dt dschrgdt 

gen year = year(dschrgdt)

di "summarize patient sample"
summ
tab bought_1is bought_1ss if never_is==0 & never_ss==0
tab never_is never_ss if bought_1is==0 & bought_1ss==0

save "$datapath/ip_sample", replace

********************************************************************************
*Step 3: Grab demographics, birth/death, and enrollment info - modified how i grab final deathdt;

*Create beneficiary master file;

forval yr=`prevyr'(1)`lastyr'{
	
	di "*******************"
	di "Year = `yr'"
	
	use `bm_vars' using "$claims/bsfbase/`yr'/bsfab`yr'", clear
	merge 1:m bene_id using "$datapath/ip_sample", keepusing(ed)
	
	di "Only keep patients appearing in the inpatient sample"
	keep if _m==3
	drop _merge ed
	
	duplicates drop bene_id, force 
	
	gen year = `yr'
	
	egen buyin = concat(buyin01-buyin12) 
	drop buyin01-buyin12 buyin_mo 
	
	if `yr'==`prevyr'{
		tempfile master
		save `master', replace		
	}
	if `yr'>`prevyr'{
		tempfile master`yr'
		save `master`yr'', replace
		use `master', clear
		append using `master`yr''
		save `master', replace
	}
	
}
*year loop ends;	

sort bene_id year
by bene_id: egen frstyr = min(year)
by bene_id: egen lastyr = max(year)

di "Update death dates to incorporate info from earlier year"
by bene_id: replace death_dt = death_dt[_n-1] if death_dt[_n-1]!=. & death_dt==.
by bene_id: gen final_deathdt = death_dt[_N]
format %td final_deathdt bene_dob
by bene_id: gen updated_death = final_deathdt!=death_dt & death_dt!=. & final_deathdt!=.
by bene_id: egen ind_update_dth = max(updated_death)
drop updated_death death_dt
*verified that final_deathdt gets the last value of death_dt. lots of missing in final_deathdt because most people don't die.

di "Edit sex and race to be constant within a person based on first obs"
by bene_id: replace sex = sex[_n-1] if sex[_n-1]!=""
by bene_id: replace race = race[_n-1] if race[_n-1]!=""
by bene_id: replace bene_dob = bene_dob[_n-1] if bene_dob[_n-1]!=.

reshape wide buyin, i(bene_id sex race bene_dob final_deathdt) j(year)

forval yr=`prevyr'(1)`lastyr'{
	
	replace buyin`yr'="XXXXXXXXXXXX" if buyin`yr'==""
}

save "$datapath/ip_bene_master", replace


*------------------------------------------------------------------------------;
*Now check enrollment;
local back_wind_len 12

use "$datapath/ip_sample", clear
merge m:1 bene_id using "$datapath/ip_bene_master", keepusing(final_deathdt bene_dob buyin* frstyr lastyr sex)

di "keep only those discharges for which we have enrollment info"
keep if _m==3
drop _merge

gen ageatadmsn = int((admsn_dt - bene_dob)/365.25)

di "1. Set enr=0 if last year of bene master file ends before admission begins"
gen pre_enr=0 if lastyr < year(admsn_dt)
tab pre_enr

egen buyin = concat(buyin*)
drop buyin`prevyr'-buyin`lastyr'

gen frst_mth = mofd(mdy(1,1,frstyr))
gen admsn_mth_pos = mofd(admsn_dt) - frst_mth +1
gen dschrg_mth_pos = mofd(dschrgdt) - frst_mth +1

di "2. set pre_enr=0 if look back period is less than a year"
replace pre_enr=0 if admsn_mth_pos < 12

gen back_window = "invalid" if pre_enr==0
replace back_window = substr(buyin,(admsn_mth_pos-11),`back_wind_len') if pre_enr==.

di "3a. Not enrolled if not in parts A and B for 12 months prior to admission"
replace pre_enr=0 if regexm(back_window,"0") | regexm(back_window,"1") | regexm(back_window,"2") ///
	| regexm(back_window,"A") | regexm(back_window,"B") |  regexm(back_window,"X")

di "4. All remaining benes are enrolled as desired"	
gen enr=0
replace enr=1 if pre_enr==. 

save "$datapath/ip_enr_sample.dta", replace

********************************************************************************
*Step 4: Grab history/future inpatient stays and dx details for all relevant patients;

di "grab history for these patients and identify which ones had an admission in the prior 90 days;"
bys bene_id: keep if _n==1 
keep bene_id 

tempfile patlist
save `patlist', replace

forval yr=`prevyr'(1)`lastyr'{
	
	di "*******************"
	di "Year = `yr'"
	
	if `yr'<2006{
			use bene_id clm_id admsn_dt dschrgdt prncpal_dgns_cd icd_dgns_cd* using "$claims/ip/`yr'/ipc`yr'", clear	
		}
		else{
			use bene_id clm_id admsn_dt dschrgdt icd_dgns_cd* using "$claims/ip/`yr'/ipc`yr'", clear
			ren icd_dgns_cd1 prncpal_dgns_cd
		}
		
	di "only retain patients in the non-deferrable sample"	
	merge m:1 bene_id using `patlist'
	keep if _m==3
	drop _m
	
	gen icd10ind = dschrgdt>=td(1oct2015)
	
	*limit to first 10 dx codes to be consistent over full period;
	cap drop icd_dgns_cd11 icd_dgns_cd12 icd_dgns_cd13 icd_dgns_cd14 icd_dgns_cd15 icd_dgns_cd16 icd_dgns_cd17 icd_dgns_cd18 icd_dgns_cd19 icd_dgns_cd20 icd_dgns_cd21 icd_dgns_cd22 icd_dgns_cd23 icd_dgns_cd24 icd_dgns_cd25
	
	di "create elixhauser flags"
	*create flag for elixhauser comorbidities;
	if `yr'<2015{
		qui elixhauser icd_dgns_cd*, index(e) smelix 
		drop weightel*
	}
	if `yr'>2015{
		qui elixhauser icd_dgns_cd*, index(10) smelix
		drop weightel*
	}
	
	if `yr'==2015{
		preserve 
		
		keep if icd10ind==0
		qui elixhauser icd_dgns_cd*, index(e) smelix 
		keep bene_id clm_id dschrgdt elixsum ynel*
		tempfile icd9el
		save `icd9el', replace
		
		restore
		
		preserve 
		
		keep if icd10ind==1
		qui elixhauser icd_dgns_cd*, index(10) smelix 
		keep bene_id clm_id dschrgdt elixsum ynel*
		tempfile icd10el
		save `icd10el', replace
		
		restore
		
		merge 1:1 bene_id clm_id dschrgdt using `icd9el', keepusing(elixsum ynel*)
		drop if _m==2
		drop _m
		
		merge 1:1 bene_id clm_id dschrgdt using `icd10el', keepusing(elixsum ynel*) update
		drop if _m==2
		drop _m
		
	}
	
	forval i=1/31{
		ren ynel`i' histynel`i'
	}
	
	if `yr'==`prevyr'{
		tempfile history
		save `history', replace
	}
	else{
		tempfile hist`yr'
		save `hist`yr'', replace
		use `history', clear
		append using `hist`yr''
		save `history', replace
	}
}

sort bene_id dschrgdt 
ren dschrgdt histdschrg
ren admsn_dt histadmsn
ren prncpal_dgns_cd histprdx
drop icd_dgns_cd* 


save "$datapath/history_ip_sample", replace
*this file is called history but also has future stays which can be used to flag readmissions;

********************************************************************************
*Step 5: create history flags;
use bene_id clm_id admsn_dt using "$datapath/ip_enr_sample", clear

gen low = admsn_dt - 365
gen high = admsn_dt -1
*stop the day prior to admission;

di "merge history file with main file and grab 365-day inpatient history"

rangejoin histdschrg low high using "$datapath/history_ip_sample", by(bene_id) keepusing(histdschrg histynel*)

sort bene_id clm_id histdschrg

di "create flag for inpatient use in the past 90 days"
gen i_hist30d = (admsn_dt - histdschrg) <=30  
gen i_hist90d = (admsn_dt - histdschrg) <=90  
gen i_hist1y = (admsn_dt - histdschrg) <=365  
by bene_id clm_id: egen hist30d = max(i_hist30d) 
by bene_id clm_id: egen hist90d = max(i_hist90d) 
by bene_id clm_id: egen hist1y = max(i_hist1y) 
drop i_hist30d i_hist90d i_hist1y

gcollapse (max) histynel* (mean) hist30d hist90d hist1y admsn_dt, by(bene_id clm_id)

*Create sum of historic elixhauser flags;
egen histelsum = rowtotal(histynel1-histynel31)

tempfile histcollapse
save `histcollapse', replace 

********************************************************************************
*Step 6: create readmission flags;
use bene_id clm_id dschrgdt using "$datapath/ip_enr_sample", clear

gen low = dschrgdt +1 
gen high = dschrgdt + 90
*count readmission duration from discharge date instead of admission date. start from next day after discharge.;

di "merge history file with main file and grab 90-day readmissions"

rangejoin histadmsn low high using "$datapath/history_ip_sample", by(bene_id) keepusing(histadmsn)

sort bene_id clm_id histadmsn

di "create flags for readmission at different durations"
gen diff_readm = histadmsn - dschrgdt
by bene_id clm_id: egen min_diff_readm = min(diff_readm)
 
local durns 15 30 45 60 75 90
foreach durn of local durns{
	gen i_readm`durn'd = histadmsn - dschrgdt <= `durn'
	by bene_id clm_id: egen readm`durn'd = max(i_readm`durn'd) 
}
drop i_readm*

gcollapse (mean) readm*d dschrgdt min_diff_readm, by(bene_id clm_id)

tempfile readm
save `readm', replace 

********************************************************************************
*Step 7: merge history and readmission flags back into main file;

use "$datapath/ip_enr_sample", clear
drop ynel*

di "merge history of 90-d inpatient use and 1 yr elixhauser dummies"
merge 1:1 bene_id clm_id admsn_dt using `histcollapse', keepusing(histynel* histelsum hist30d hist90d hist1y)
drop if _m==2
drop _m

recode histynel* hist30d hist90d hist1y histelsum (mis=0)

di "merge readmission flags"
merge 1:1 bene_id clm_id dschrgdt using `readm', keepusing(readm*d min_diff_readm)
drop if _m==2
drop _m

drop icd_dgns_cd2-icd_dgns_cd10 admsn_mth_pos dschrg_mth_pos back_window pre_enr buyin

di "save final file with death date, admsn_dt, historical use, elixhauser dummies based on historical use, and readmission flags"
summ

save "$datapath/ip_enr_hist_readm_sample", replace


*END CODE;
